********************************************************************************
*** 7biocoal.do**********************************************************
*** last change: 2023/05/01 (LH)************************************************
********************************************************************************	
*
* First run 00master, 1prepare and 5estimations.
*
/* ------------------------------------------------------------------------ */
/* Contents of this file -	Figures for Haywood, Janser, Koch paper - */
/* ------------------------------------------------------------------------ */
	
	* (0) Prepare data : define additional variables for characteristics and transitions

	*** FIGURES ***
	
	* Figure 1 - Abs number of lignite mining & services employees per mining region
	* [ Figure 2 - Wage distributions produced in 5_estimations ]	
	* Figure 3 - projection of welfare costs and workforces from Bestimcell.m/Asimulage.m]
	* Figure 4 - pre- and post-imputation wages
	* Figure 5 - Mean age of employees
	* => Export output in one excel file

	*** TABLES ***
	
	* Table 1 - Descriptive Statistics
	* [Table 2 - produced by 5estimations ]


*** TABULA RASA ***
clear all

*** FILENAME & FILE-SPECIFIC GLOBALS ***
glo filename 	7biocoal
glo date 		= string( d(`c(current_date)'), "%tdCYND" )

*** SETTINGS ***
set more off 
set linesize 200        
set rmsg on            
set maxvar 32767, permanently
set excelxlsxlargefile on 

*** START LOG-FILE ***
if ${log_active}==1 {
	mac list date
	capture log close
	log using ${log}/${filename}_${sample}_${date}.log, replace text
	}

*** OPEN DATA FILE ***

use ${data}\precoll.dta, clear
*
* We use precoll, produced by 5estimations - based on postprep_2
*   (postprep_2 means that for the production of these results, use sample 2
* 	(see datadoc - alternative: use only one series of employment biography 
* 	(postprep_3, postprep_4) or not impute black holes as own spells with status (postprep_1)
* 
cap drop lignite
gen lignite=thisspelllignite
label var lignite "this spell is lignite"
*rename persnr pid
cap rename wz08_kons_num wz08


/* ------------------------------------------------------------------------ */
* Figures & empirical evidence used in paper
/* ------------------------------------------------------------------------ */	

* note that the sum of individuals in different cells may be higher, as some
* individuals may contribute to several cells.
	
	* not chronological in order of paper: Table 1 is below...
	
/* ------------------------------------------------------------------------ */
 * 	Figure 1 - "Abs number of lignite mining & services employees per mining region"
/* ------------------------------------------------------------------------ */		
	forvalues t = 1975/$maxyear {
		di "Tabulate size of lignite industry across different mining areas in year `t'"
		estpost tab mining_area if mdy(06,30,`t') >= begepi & mdy(06,30,`t') <= endepi & thisspelllignite == 1
		estimates store y`t'
	}
	esttab y* using results/${samplefolder}/7b_empareas.csv, label plain replace 
	estimates clear	
	
/* ------------------------------------------------------------------------ */
 * 	Figure 2 - "Absolute number of employees in relevant industry sectors"
					*  - "Number of employees in lignite mining and mining services"
/* ------------------------------------------------------------------------ */
	forvalues t = 1975/$maxyear {
		di "Differentiate number of employees in lignite mining and mining services in year `t'"
		tab thisspelllig_cat if mdy(06,30,`t') >= begepi & mdy(06,30,`t') <= endepi & thisspelllignite == 1
		estpost tab thisspelllig_cat if mdy(06,30,`t') >= begepi & mdy(06,30,`t') <= endepi & thisspelllignite == 1
		estimates store y`t'
	}
	esttab y* using results/${samplefolder}/7b_empsector.csv, label plain replace
	estimates clear
	
/* ------------------------------------------------------------------------ */
 * 	Figure 4 - Coal-mining retirement age across decades
/* ------------------------------------------------------------------------ */	
	forvalues t = 1/4 {
		di "Lignite employees' age at retirement for all mining areas by decade"
		tab agepotret if pretrans == 1 & decade == `t' & thisspelllignite == 1
		estpost tab agepotret if pretrans == 1 & decades == `t' & thisspelllignite == 1
		estimates store y`t'
	}		
	esttab y* using results/${samplefolder}/7b_ageret.csv, label plain replace
	estimates clear
	
/* ------------------------------------------------------------------------ */
 * 	Figure 5 - Mean age of employees
/* ------------------------------------------------------------------------ */	
	forvalues i = 1/5 {
		forvalues t = 1975/$maxyear {
			cap drop age
			gen age = `t' - year(geb_dat)	
			cap drop area*meanage 
			cap egen area`i'meanage`t'=mean(age) if mdy(06,30,`t') >= begepi & mdy(06,30,`t') <= endepi & thisspelllignite == 1 & mining_area==`i'
			cap estimates store y`t'_`i'
		}
	}
	estpost sum area* 
	
	esttab using results/${samplefolder}/7b_meanage.csv, cells("mean")  nomtitle nonumber append
	estimates clear

	
/* ------------------------------------------------------------------------ */
 * 	Export output in one excel file
/* ------------------------------------------------------------------------ */	
	preserve
		import delimited using results/${samplefolder}/7b_empareas.csv,  stripquotes(yes) delimiters(",") bindquotes(nobind) clear
		export excel using results/${samplefolder}/7b_BiocoalFiguresForPaper.xlsx, sheet("empareas") replace
		import delimited using results/${samplefolder}/7b_empsector.csv,  stripquotes(yes) delimiters("=") bindquotes(nobind)  varnames(1) clear
		export excel using results/${samplefolder}/7b_BiocoalFiguresForPaper.xlsx, sheet("empsector") sheetmodify	
		import delimited using results/${samplefolder}/7b_ageret.csv,  stripquotes(yes) delimiters(",") bindquotes(nobind) clear
		export excel using results/${samplefolder}/7b_BiocoalFiguresForPaper.xlsx, sheet("ageret") sheetmodify
		import delimited using results/${samplefolder}/7b_meanage.csv,  stripquotes(yes) delimiters(",") bindquotes(nobind) clear
		export excel using results/${samplefolder}/7b_BiocoalFiguresForPaper.xlsx, sheet("meanage") sheetmodify
	restore	
	
* ------------------------------------------------------------------------ *
*  Table 1 - Comparing descriptive stats across samples
* ------------------------------------------------------------------------ *	

******************************************************************
***  We have three datasets:
** (a) Whole sample = precoll (already loaded here)
** (b) precoll but only coal workers
** (c) wage sample used to calculate wcoal
** (d) wage sample used to calculate wnc
***  => report descriptive statistics for three datasets ***
*********************************************************
save delme, replace

* New for JAERE August 2022 - distinction between the two wage samples used to 
* calculate wcoal and wnc 

local i = 1
while `i' < 5 { 
if i==1
*first run descriptive: run analysis on data already loaded
disp("first column: whole sample")

if `i'==2{
* if i=2, second run: only coal spells
* reload dataset to generate newly the macro conditions of coal spells
disp("second column: only coal spells")
use ${data}/precoll.dta, clear
drop if thisspelllignite==0
}

if `i'==3{
* if i=3, third run: only sample used to calculate coal wage distribution
	if ${sample}<7{
		use ${data}\wage_sample_wcoal_estimation.dta, clear
	}
	if ${sample}==7{
		use ${data}\wage_sample7_wcoal_estimation.dta, clear	
		}
	if ${sample}==8{
		use ${data}\wage_sample8_wcoal_estimation.dta, clear
	}
	if ${sample}==9{
		use ${data}\wage_sample9_wcoal_estimation.dta, clear
	}

disp("second column: only wage spells used for coal wage distribution")
keep if (wcoal_end_monthly==.)
}

if `i'==4{
* if i=4, fourth run: only sample used to calculate non-coal wage distribution
use ${data}\wage_sample_wnc_estimation.dta, clear
disp("second column: only wage spells used for non-coal wage distribution")
keep if (wcoal_end_monthly==. | wnc_offer_tentg_beg==.)
}
*** total number of spells / number of lignite spells / size wage sample
disp("total number of spells")
count

*** count how many individuals contribute to estimation
g delfirstpid=.
bys pid: replace delfirstpid=(_n==1)
disp("distinct individuals contributing to estimation")
count if (delfirstpid==1)
drop delfirstpid

*** Define variables for descriptive statistics

	* (6.1) Categorical age variable agecat2 already defined
	
	* (6.2) Categorical edu variable educat already defined
			
	* (6.3) Create resid_area as generalisation of mining area for non-coal jobs
			cap drop livarea
			g livarea=.
			* Mining Area 1: Lausitz
			replace livarea = 1 if ao_kreis == 12052 // Stadt Cottbus (Brandenburg)
			replace livarea = 1 if ao_kreis == 12061 // LK Dahme-Spreewald (Brandenburg)
			replace livarea = 1 if ao_kreis == 12062 // LK Elbe-Elster (Brandenburg)
			replace livarea = 1 if ao_kreis == 12066 // LK Oberspreewald-Lausitz (Brandenburg)
			replace livarea = 1 if ao_kreis == 12071 // LK Spree-Neisse (Brandenburg)
			replace livarea = 1 if ao_kreis == 14625 // LK Bautzen (Sachsen)
			replace livarea = 1 if ao_kreis == 14626 // LK Goerlitz (Sachsen)
			* Mining Area 2: Mitteldeutsches Revier
			replace livarea = 2 if ao_kreis == 14713 // Stadt Leipzig (Sachsen)
			replace livarea = 2 if ao_kreis == 14729 // LK Leipzig (Sachsen)
			replace livarea = 2 if ao_kreis == 14730 // LK Nordsachsen (Sachsen)
			replace livarea = 2 if ao_kreis == 15002 // Stadt Halle(Saale)
			replace livarea = 2 if ao_kreis == 15082 // LK Anhalt-Bitterfeld (Sachsen-Anhalt)
			replace livarea = 2 if ao_kreis == 15084 // LK Burgenlandkreis (Sachsen-Anhalt)
			replace livarea = 2 if ao_kreis == 15087 // LK Mansfeld-Südharz (Sachsen-Anhalt)
			replace livarea = 2 if ao_kreis == 15088 // LK Saalekreis (Sachsen-Anhalt)
			replace livarea = 2 if ao_kreis == 15091 // LK Wittenberg (Sachsen-Anhalt)
			replace livarea = 2 if ao_kreis == 16077 // LK Altenburger Land (Thüringen)
			replace livarea = 2 if ao_kreis == 16075 // LK Saale-Orla-Kreis (Thüringen)
			* Mining Area 3: Rheinisches Revier
			replace livarea = 3 if ao_kreis == 05112 // Stadt Duisburg (NRW)
			replace livarea = 3 if ao_kreis == 05162 // LK Rhein-Kreis Neuss (NRW)
			replace livarea = 3 if ao_kreis == 05315 // Stadt Köln (NRW)
			replace livarea = 3 if ao_kreis == 05334 // LK Städteregion Aachen (NRW)
			replace livarea = 3 if ao_kreis == 05362 // LK Rhein-Erft-Kreis (NRW)
			replace livarea = 3 if ao_kreis == 05358 // LK Düren (NRW)
			replace livarea = 3 if ao_kreis == 05366 // LK Euskirchen (NRW)
			* Mining Area 4: Other Coal areas - (first two are Helmstedter Revier)
			replace livarea = 4 if ao_kreis == 03154 // LK Helmstedt (Niedersachsen)
			replace livarea = 4 if ao_kreis == 15083 // LK Börde (Sachsen-Anhalt)
			replace livarea = 4 if ao_kreis == 03153 // LK Goslar
			replace livarea = 4 if ao_kreis == 03241 // Region Hannover
			replace livarea = 4 if ao_kreis == 05570 // LK Warendorf
			replace livarea = 4 if ao_kreis == 06440 // LK Wetteraukreis
			replace livarea = 4 if ao_kreis == 06611 // Stadt Kassel
			replace livarea = 4 if ao_kreis == 06634 // LK Schwalm-Eder-Kreis
			replace livarea = 4 if ao_kreis == 06636 // LK Werra-Meißner-Kreis
			replace livarea = 4 if ao_kreis == 09162 // Stadt München
			replace livarea = 4 if ao_kreis == 09376 // LK Schwandorf
			replace livarea = 4 if ao_kreis == 09672 // LK Bad Kissingen
			replace livarea = 4 if ao_kreis == 10041 // Regionalverband Saarbrücken
			replace livarea = 4 if ao_kreis == 10044 // LK Saarlouis
			replace livarea = 4 if ao_kreis == 11000 // Stadt Berlin
			replace livarea = 4 if ao_kreis == 13073 // LK Vorpommern-Rügen
			replace livarea = 4 if ao_kreis == 14521 // LK Erzgebirgskreis	
			* Non-Mining Areas 5: Other undefined Reviere
			replace livarea = 5 if livarea == . & ao_kreis !=.
			label define livareas 1 "Lusatia" 2 "Central Germany" 3 "Rhineland" 4 "Other coal area" 5 "Other non-coal areas"
			label val livarea areas

	* (6.4) Create macroeconomic conditions

	* Note that the cell-based approach first cuts spells which span across different macroeconomic conditions. 
	* Here we do not do this.
			
		g macro=.

		* bad macro conditions = 1 / good macro conditions = 2
		* approximations here
		* (1) good macro is less than 10% unemployment
		* (2) 	Lausitz:    from (incl) 2015-
		* 		Rheinisches from (incl) 2007 -
		*       Helmstedter from (incl) 2008 - 
		* 		Mitteldeutsches from (incl) 2016 -
		replace macro=1 if mining_area== 1 & endepi<mdy(01,01,2015)
		replace macro=1 if mining_area== 2 & endepi<mdy(01,01,2016)
		replace macro=1 if mining_area== 3 & endepi<mdy(01,01,2008)
		replace macro=1 if mining_area== 4 & endepi<mdy(01,01,2007)
		
		replace macro=2 if mining_area== 1 & endepi>=mdy(01,01,2015)
		replace macro=2 if mining_area== 2 & endepi>=mdy(01,01,2016)
		replace macro=2 if mining_area== 3 & endepi>=mdy(01,01,2008)
		replace macro=2 if mining_area== 4 & endepi>=mdy(01,01,2007)

		* West-Germany: Unemployment below 10% in all years since 1990 
		replace macro=2 if (mining_area==. | mining_area==5) & ao_bula<12

		* East Germany: Unemployment below 10% for women since 2012, for men since 2015
		replace macro=1 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi<mdy(01,01,2012) & frau==1
		replace macro=1 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi<mdy(01,01,2014) & frau==0
		replace macro=2 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi>=mdy(01,01,2012) & frau==1
		replace macro=2 if (mining_area==. | mining_area==5) & ao_bula>=12 & endepi>=mdy(01,01,2014) & frau==0
		cap label drop macroLAB
		label define macroLAB 1 "hi-unemp" 2 "lo-unemp" 
		label values macro macroLAB

* produce tables of the descriptive stats for the three datasets

tab agecat2,m
tab frau,m
tab educ2,m
tab livarea, m
tab macro,m
tab status, m
tab beruf12, m

**********************************************************************
******
* JAERE December 2022 - industry sectors after coal
* => see 5estimations section (2); section (9.5.1)

	* JAERE - R2.MC7 - Distribution of length of spells in lignite
	* new january 2023
	disp("distribution of durations of this sample")
	su dur, det
	disp("distribution of durations in lignite of this sample")
	su dur if thisspelllignite==1, det
	disp("distribution of durations in lignite & statsimple==1 of this sample")
	su dur if thisspelllignite==1 & statsimple==1, det
	disp("")
	su dur if thisspelllignite==0 & statsimple==1, det

	* test the specific criterion for inclusion 6 months in lignite mining
	count if dur<=180 
	* distributions for durations below 3651 days
	*			below 1000 days, below 500 days, below 365 days

local durations 181 366 500 1000 3700 7300
foreach d of local durations {
		cap histogram dur if statsimple==1 & dur<=`d'
		cap graph save dur`d'sample${sample}run`i'.gph, replace
		cap graph export dur`d'sample${sample}run`i'.png, replace
	}
	
**********************************************************************
local i = `i' + 1
}		
* Table 2 - parameter estimates: 
*   see   TransitionParameters_MainResults.xlsx
*   	& cellresults.xlsx
*		& 5_sample2_wages_distribution.xlsx
*       (all from 5estimations.do) 

* back out of loop, reload data so as not to remain in wage_sample
use delme, clear

		
** CLOSE LOG-FILE **	
if ${log_active}==1 {$
	capture log close
	}

